import os
from tqdm import tqdm
import json
from image_synthesis.data.utils.tsv_file import TSVFile, CompositeTSVFile, tsv_writer

def gen_rows(tsvfile):
    tsv = TSVFile(tsvfile)
    for idx in range(len(tsv)):
        filename, anno = tsv[idx]
        meta = json.loads(anno)
        caption = meta['captions'][0]
        title = meta['title'] if 'title' in meta else 'unknown'
        
        url = meta['url']
        new_anno = json.dumps([{'class': 'clipart'}, {'caption': '%s'%caption}, {'title': title}])
        yield filename, new_anno, url

indir = '/mnt2/datasets/tophost-landscape/'
outdir = '/mnt/code/tsvviewer/data/'
dataset_names = ['alamy', 'featurepics', 'freepik', 'istockphoto']
# dataset_names = ['dreamstime']

for ds in dataset_names:
    print('prepare view tsv for  %s===>' % ds)
    tsv_file_path = os.path.join(indir, 'filtered_%s_text_combined.tsv' % ds)
    os.makedirs(os.path.join(outdir, ds+'_landscape'), exist_ok=True)
    out_tsv_file = os.path.join(outdir, ds+'_landscape', 'train.tsv')
    tsv_writer(gen_rows(tsv_file_path), out_tsv_file)

    